library(tidyverse)
library(countrycode)
library(lubridate)
library(RColorBrewer)
library(plotly)
df <- read_csv("refugeesAndMigrants.csv")
df$country <- df$`country of incident`
df$continent <- countrycode(sourcevar = df[['country']],
origin = "country.name",
destination = "continent")
## Warning in countrycode(sourcevar = df[["country"]], origin = "country.name", : Some values were not matched unambiguously: Hrvatska, unknown, Wales
df %>%
summarize(sum_number = sum(number, na.rm =TRUE))
## # A tibble: 1 x 1
## sum_number
## <dbl>
## 1 34361
europe <- df %>%
select(-`country of incident`, -`source`) %>%
filter(continent == "Europe") %>%
group_by(country, `found dead`) %>%
summarize(sum_number = sum(number, na.rm =TRUE), .groups = 'drop') %>%
rename(date = `found dead`) %>%
ungroup()
d0 <- europe %>%
mutate(clean_date = dmy(date)) %>%
mutate(clean_year = year(clean_date)) %>%
filter((!is.na(clean_date))) %>%
select(country, sum_number, clean_year)
## Warning: Problem with `mutate()` input `clean_date`.
## ℹ 159 failed to parse.
## ℹ Input `clean_date` is `dmy(date)`.
d1 <- europe %>%
mutate(clean_date = dmy(date)) %>%
filter(is.na(clean_date)) %>%
mutate(check= parse_number(date))
## Warning: Problem with `mutate()` input `clean_date`.
## ℹ 159 failed to parse.
## ℹ Input `clean_date` is `dmy(date)`.
d1$clean_year <- NA
for(i in 1:nrow(d1)){
if(d1[i,5] > 1000){
# paste into position i of vector m
d1$clean_year[i] <- d1[i,5]
} else if(d1[i,5] > 18){
# paste into position i of vector m
d1$clean_year[i] <- d1[i,5] + 1900
} else if(d1[i,5] <= 18) {
d1$clean_year[i] <- d1[i,5] + 2000
}
}
d1 <- d1 %>%
mutate(clean_year = as.numeric(as.character(unlist(d1$clean_year)))) %>%
select(country, sum_number, clean_year)
europe_1 <- rbind(d0,d1)
europe_1 <- europe_1 %>%
group_by(country, clean_year) %>%
arrange(country, clean_year)
europe_2 <- europe_1 %>%
group_by(clean_year, country) %>%
summarise(sum_max_amount = sum(sum_number), .groups = 'drop') %>%
arrange(clean_year, country)
europeanUnion <- c("Austria","Belgium","Bulgaria","Croatia","Cyprus",
"Czech Rep.","Denmark","Estonia","Finland","France",
"Germany","Greece","Hungary","Ireland","Italy","Latvia",
"Lithuania","Luxembourg","Malta","Netherlands","Poland",
"Portugal","Romania","Slovakia","Slovenia","Spain",
"Sweden","United Kingdom")
eu <- as.data.frame(europeanUnion)
europe_2 <- full_join(europe_2, eu, by = c("country"="europeanUnion"))
europe_3 <- europe_2 %>%
group_by(country) %>%
complete(clean_year = 1993:2018) %>%
select(country, Year = clean_year, sum_max_amount) %>%
arrange(country) %>%
replace(is.na(.), 0) %>%
mutate(Cumulative_Deaths = cumsum(sum_max_amount))
europe_3$code <- countrycode(europe_3$country,"country.name", "iso3c")
europe_3 <- europe_3 %>%
filter(Year != 0) %>%
mutate(hover=paste0(country, "\n",Cumulative_Deaths, " Deaths"))
write_csv(europe_3, "europe_3.csv")
europe_3 <- read_csv("europe_3.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## country = col_character(),
## Year = col_double(),
## sum_max_amount = col_double(),
## Cumulative_Deaths = col_double(),
## code = col_character(),
## hover = col_character()
## )
library(plotly)
g <- list(scope = 'europe')
graph <- plot_geo(europe_3, frame = ~Year) %>%
add_trace(
z = ~Cumulative_Deaths,
zmin = 0,
zmax = max(europe_3$Cumulative_Deaths),
locations = ~code,
color = ~Cumulative_Deaths,
colorscale = 'Purples',
text = ~hover,
hoverinfo='text') %>%
colorbar(title = "") %>%
layout(title= list(text = "\nTotal Cumulative Migrant Deaths in Europe\n(1993-2018)",
x = 0.01),
margin = list(b = 50, l = 50),
geo = list(scope = 'europe',
lataxis = list(range = c(30, 80)),
lonaxis = list(range = c(-10, 40)))) %>%
config(displayModeBar = FALSE)
graph
# not working not used
#start_date <- as.Date('1993/1/1')
#date_range <- seq(start_date, by = 'year', length.out = 26)
#date_range <- year(date_range)
#date_range <- as.data.frame(date_range)
#europe_3 <- full_join(europe_2, date_range, by = c("clean_year"="date_range"))
## not working
#europe_1 %>%
# group_by(country) %>%
# mutate(cum_death = sum_number + lag(sum_number, default=first(sum_number)))
second graph
text <- df %>%
filter(continent == "Europe") %>%
mutate(description = `cause of death`) %>%
select(`found dead`, `number`, `country`, continent, description) %>%
rename(date = `found dead`,
deaths = `number`,
country = `country`) %>%
mutate(description = tolower(description))
sea <- c("boat", "drown", "drowning", "sea", "ocean")
vehicle <- c("stowaway", "car", "vehicle",
"plane", "airplane", "train", "truck", "van")
fire <- c("arson", "fire", "gas", "gasoline")
suicide <- c("suicide", "hanged", "hang", "jump",
"drugs", "drug", "hungerstrike", "suffocate", "suffocated")
murder <- c("fight", "murder", "murdered")
suicide_muder <- c("suicide", "hanged", "hang", "jump",
"drugs", "drug", "hungerstrike", "suffocate", "suffocated",
"fight", "murder", "murdered")
text_1 <- text %>%
mutate(sea = case_when(grepl(paste(sea, collapse="|"), description) ~ "Sea",
TRUE ~ "Other")) %>%
mutate(vehicle = case_when(grepl(paste(vehicle, collapse="|"), description) ~ "Vehicle",
TRUE ~ "Other")) %>%
mutate(fire = case_when(grepl(paste(fire, collapse="|"), description) ~ "Fire",
TRUE ~ "Other")) %>%
mutate(suicide = case_when(grepl(paste(suicide, collapse="|"), description) ~ "Suicide",
TRUE ~ "Other")) %>%
mutate(murder = case_when(grepl(paste(suicide, collapse="|"), description) ~ "Murder",
TRUE ~ "Other"))
## Stopped Here have to multiply deaths by number of occurrences for cause of death
text_2 <- text %>%
mutate(cause = case_when(grepl(paste(sea, collapse="|"), description) ~ "Sea",
grepl(paste(vehicle, collapse="|"), description) ~ "Vehicle",
grepl(paste(fire, collapse="|"), description) ~ "Fire",
grepl(paste(suicide, collapse="|"), description) ~ "Suicide",
grepl(paste(murder, collapse="|"), description) ~ "Murder",
TRUE ~ "Other")) %>%
group_by(cause, country, deaths) %>%
tally()
graph2 <- text_2 %>%
mutate(cumulative_deaths = deaths * n)
# workshopping graph
graph2 %>%
filter(country %in% c('Spain', 'Italy', 'Greece', 'France')) %>%
group_by(country, cause) %>%
arrange(desc(cumulative_deaths)) %>%
ggplot(aes(x = cumulative_deaths, y = reorder(cause, cumulative_deaths),
label=cumulative_deaths)) +
geom_col(show.legend = FALSE) +
facet_wrap(~country, ncol = 2, scales = "free") +
labs(x = "Frequency",
y = 'Cause of Death') +
ggtitle("Cause of Death by Country") +
theme(plot.title = element_text(vjust=2, hjust = 0.5),
legend.position = 'none') +
ylim('Vehicle', 'Other', 'Sea')
## Warning: Removed 19 rows containing missing values (position_stack).

# workshopping graph in percent - stopped here
check <- graph2 %>%
filter(country %in% c('Spain', 'Italy', 'Greece', 'France')) %>%
group_by(country, cause) %>%
arrange(desc(cumulative_deaths)) %>%
ggplot(aes(x = ..prop.., y = cause, group = 1)) +
geom_bar(show.legend = FALSE, stat = 'count') +
facet_wrap(~country, ncol = 2, scales = "free") +
labs(x = "Percentage",
y = 'Cause of Death') +
ggtitle("Cause of Death by Country") +
theme(plot.title = element_text(vjust=2, hjust = 0.5),
legend.position = 'none') +
ylim('Suicide', 'Murder', 'Fire','Vehicle', 'Other', 'Sea') +
scale_x_continuous(labels = scales::percent_format()) +
coord_cartesian(xlim=c(0, 1))
check

graph2 %>%
filter(country %in% c('Spain', 'Italy', 'Greece', 'France')) %>%
group_by(country) %>%
mutate(percent = prop.table(cumulative_deaths) * 100) %>%
arrange(desc(percent)) %>%
group_by(cause, country)
## # A tibble: 223 x 6
## # Groups: cause, country [23]
## cause country deaths n cumulative_deaths percent
## <chr> <chr> <dbl> <int> <dbl> <dbl>
## 1 Sea Greece 360 1 360 19.1
## 2 Vehicle France 1 81 81 10.4
## 3 Sea Italy 268 1 268 8.27
## 4 Sea Italy 250 1 250 7.71
## 5 Sea Spain 1 242 242 7.58
## 6 Sea France 19 3 57 7.32
## 7 Sea France 26 2 52 6.68
## 8 Sea Italy 100 2 200 6.17
## 9 Sea France 23 2 46 5.91
## 10 Sea Italy 177 1 177 5.46
## # … with 213 more rows
graph3 <- graph2 %>%
filter(country %in% c('Spain', 'Italy', 'Greece', 'France')) %>%
group_by(country, cause) %>%
summarize(sum_deaths = sum(cumulative_deaths)) %>%
mutate(percent = round((prop.table(sum_deaths) * 100), 2))
## `summarise()` has grouped output by 'country'. You can override using the `.groups` argument.
graph3
## # A tibble: 23 x 4
## # Groups: country [4]
## country cause sum_deaths percent
## <chr> <chr> <dbl> <dbl>
## 1 France Fire 29 3.72
## 2 France Murder 5 0.64
## 3 France Other 40 5.13
## 4 France Sea 609 78.2
## 5 France Suicide 11 1.41
## 6 France Vehicle 85 10.9
## 7 Greece Fire 4 0.21
## 8 Greece Murder 1 0.05
## 9 Greece Other 202 10.7
## 10 Greece Sea 1595 84.8
## # … with 13 more rows
write_csv(graph3, "graph3.csv")
graph3$cause <- factor(graph3$cause , ordered = TRUE,
levels = c("Sea", "Other", "Vehicle", "Fire",
'Murder', "Suicide"))
my_colors <- c("blue1", "#4DAF4A", "#FF7F00",
"#E41A1C", "#FFFF33", "#984EA3")
barplot <- ggplot(graph3, aes(x = percent, y = cause, fill = cause)) +
geom_col(show.legend = FALSE, col = 'black') +
facet_wrap(~country, ncol = 2) +
labs(x = "Percentage",
y = 'Cause of Death') +
ggtitle("Cause of Death by Country\n(1993-2018)") +
ylim('Suicide', 'Murder', 'Fire','Vehicle', 'Other', 'Sea') +
xlim(0,100) +
scale_fill_manual(values = my_colors) +
geom_text(
aes(x = percent, y = cause, label = paste0(percent, "%")),
hjust = -0.5, size = 3.5,
position = position_dodge(width = 1),
inherit.aes = TRUE
) +
theme_bw() +
theme(panel.grid.major = element_blank(),
strip.text = element_text(size=15),
plot.title = element_text(family = "sans", hjust = 0.5,
size = 18, margin=margin(0,0,10,0)),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
panel.border = element_rect(colour = "black", fill = NA))
barplot

## checking summary statistics
graph2 %>%
filter(country %in% c('France')) %>%
filter(cause == 'Sea') %>%
summarize(d = sum(cumulative_deaths))
## `summarise()` has grouped output by 'cause'. You can override using the `.groups` argument.
## # A tibble: 1 x 3
## # Groups: cause [1]
## cause country d
## <chr> <chr> <dbl>
## 1 Sea France 609
europe_3 %>%
summarize(sum = sum(sum_max_amount))
## # A tibble: 1 x 1
## sum
## <dbl>
## 1 10041
text %>%
summarize(sum = sum(deaths))
## # A tibble: 1 x 1
## sum
## <dbl>
## 1 10041
df %>%
filter(country == "Spain") %>%
summarize(deaths = sum(number))
## # A tibble: 1 x 1
## deaths
## <dbl>
## 1 3191